########################    ANALYSIS OF ANES_EGSS4 DATA    ##############################################
##  February 2012; Total cases are 1314, Complete interviews are 1253
################################################################################################
## CLEAN UP
rm(list=ls());

################################################################################################
## LODING FUNCTIONS
source("/knowledge_functions.R");

## LOAD THE DATA
egg4 <- read.csv(file="/anes_specialstudies_2012egss4.csv", header=TRUE);
dim(egg4);


###########################    TABLE A1    ###########################
##  Sum of the Number of Correct Responses Excluding The Given Item
res.data <- egg4[,c("re_zh1","re_zh2","re_zh3","re_zh4","c4_zh1","c4_zh2","c4_zh3","c4_zh4")];

##  Distribution of Choice Options: Chief Justice
res.data$n.correct.rm1 <- rowSums(res.data[,c(2:4)], na.rm=TRUE);
ctable <- crosstab(res.data, row.vars="n.correct.rm1", col.vars="c4_zh1", type="f");
ctable$table;

ct <- ctable$table;
ct <- cbind(ct[,4:7], ct[,1], ct[,8]);
ct[1,6] <- ct[1,6] - 55;
colnames(ct) <- c("John Roberts", "David Cole", "Anthony Kennedy", "Larry Thompson", "No Answer", "Sum");

ct;
round(ct/ct[,6]*100, 2);


# UK Premier
res.data$n.correct.rm2 <- rowSums(res.data[,c(1,3:4)], na.rm=TRUE);
ctable <- crosstab(res.data, row.vars="n.correct.rm2", col.vars="c4_zh2", type="f");
ctable$table;

ct <- ctable$table;
ct <- cbind(ct[,4:7], ct[,1], ct[,8]);
ct[1,6] <- ct[1,6] - 55;
colnames(ct) <- c("David Cameron", "Nick Clegg", "Tony Hayward", "Richard Branson", "No Answer", "Sum");

ct;
round(ct/ct[,6]*100, 2);


# Speaker of HOR
res.data$n.correct.rm3 <- rowSums(res.data[,c(1:2,4)], na.rm=TRUE);
ctable <- crosstab(res.data, row.vars="n.correct.rm3", col.vars="c4_zh3", type="f");
ctable$table;

ct <- ctable$table;
ct <- cbind(ct[,4:7], ct[,1], ct[,8]);
ct[1,6] <- ct[1,6] - 55;
colnames(ct) <- c("John Boehner", "Harry Reid", "Eric Holder", "Mitt Romney", "No Answer", "Sum");

ct;
round(ct/ct[,6]*100, 2);


# Least Spending
res.data$n.correct.rm4 <- rowSums(res.data[,c(1:3)], na.rm=TRUE);
ctable <- crosstab(res.data, row.vars="n.correct.rm4", col.vars="c4_zh4", type="f");
ctable$table;

ct <- ctable$table;
ct <- cbind(ct[,4:7], ct[,1], ct[,8]);
ct[1,6] <- ct[1,6] - 55;
colnames(ct) <- c("Foreign Aid", "Medicare", "National Defense", "Social Security", "No Answer", "Sum");

ct;
round(ct/ct[,6]*100, 2);


################################################################################################
########################    DATA ANALYSIS FOR ALL ONLY MULTIPLE-CHOICE ITEMS    ###########
## LOAD PACKAGES
library(R2jags);

########################    2PL-IRT MODEL    #######################
## SET THE SEED, WHICH TELLS US WHERE THE RANDOM PROCESS STARTS.
set.seed(07102014);

## SPECIFY DATA PARAMETERS
y <- res.matrix;
N <- dim(y)[1];
K <- dim(y)[2];

data1 <- list("y", "N", "K");

# INITIAL VALUES; LARGE DISPREAD
inits1 <- list(
	list(theta=rep(0,N), alpha=rep(0,K), tau.alpha=0.1, beta=rep(5,K) ),
	list(theta=rep(-3,N), alpha=rep(3,K), tau.alpha=1, beta=rep(0.1,K) ),
	list(theta=rep(3,N), alpha=rep(-3,K), tau.alpha=5, beta=rep(1,K) )
    );

parameters1 <- c("theta", "alpha", "sigma.alpha", "beta");


##  MARKOV CHAIN MONTE CARLO
n.chains <- length(inits1);
n.iteration <- 100000;
frac.discard <- 0.5;  # FRACTION OF SAMPLES DISCARDED
n.discard <- n.iteration*frac.discard; n.discard;  # BURNIN-IN PERIOD
n.thining <- 10;
n.saved.chin <- (n.iteration - n.discard)/n.thining; n.saved.chin;  # SAMPLES AFTER THINING SAVED FOR EACH CHAIN
n.saved.chin*n.chains;  # TOTAL SAMPLES SAVED


## HAND-OFF TO JAGS
bml1 <- jags(model.file="irt_2pl.R",
             data = data1,
             inits = inits1,
             parameters.to.save = parameters1,
             n.chains = n.chains,   # (DEFAULT: 1)
             n.iter = n.iteration,
             n.burnin = n.discard,
             n.thin = n.thining,
             DIC = TRUE
             );

print(bml1);


##  PROCESS THE MCMC RESULTS
mcmc.out <- as.mcmc(bml1);


##  SAVE THE RESULTS
out.matrix <- as.matrix(mcmc.out);
dim(out.matrix);
#out.matrix[1,1:50];


alpha <- out.matrix[,c(1:4,10)];
beta <- out.matrix[,c(5:8)];
theta <- out.matrix[,11:1324];

out.para <- cbind(alpha, beta);
dim(out.para);


##  Order "theta" by 1,2,3,... rather than 1,10,100,...
step1 <- sapply(strsplit(x=colnames(theta), split='[', fixed=TRUE), function(x) (x[2]));
step2 <- sapply(strsplit(x=step1, split=']', fixed=TRUE), function(x) (x[1]));
step3 <- data.frame(index=1:length(colnames(theta)), theta=as.numeric(step2));
index <- step3[order(step3[,2]), ][,1];

theta.sort <- theta[,index];


##  SAVE THE RESULTS
#write.csv(out.para, "/2pl_para_anes.csv", row.names=FALSE);
#write.csv(theta.sort, "/2pl_theta_anes.csv", row.names=FALSE);


########################    3PL-IRT MODEL    #######################
## SET THE SEED, WHICH TELLS US WHERE THE RANDOM PROCESS STARTS.
set.seed(07102014);

##
guess.ind <- c(1,1,1,1);  # INDICATES WHICH ITEMS HAVE A GUESSING PARAMETER

data1 <- list("y", "N", "K", "guess.ind");

# INITIAL VALUES; LARGE DISPREAD
inits1 <- list(
	list(theta=rep(0,N), alpha=rep(0,K), tau.alpha=0.1, beta=rep(5,K), gamma.star=rep(0.1,K) ),
	list(theta=rep(-3,N), alpha=rep(3,K), tau.alpha=1, beta=rep(0.1,K), gamma.star=rep(0.5,K) ),
	list(theta=rep(3,N), alpha=rep(-3,K), tau.alpha=5, beta=rep(1,K), gamma.star=rep(0.9,K) )
    );

parameters1 <- c("theta", "alpha", "sigma.alpha", "beta", "gamma");


##  MARKOV CHAIN MONTE CARLO
n.chains <- length(inits1);
n.iteration <- 100000;
frac.discard <- 0.5;  # FRACTION OF SAMPLES DISCARDED
n.discard <- n.iteration*frac.discard; n.discard;  # BURNIN-IN PERIOD
n.thining <- 10;
n.saved.chin <- (n.iteration - n.discard)/n.thining; n.saved.chin;  # SAMPLES AFTER THINING SAVED FOR EACH CHAIN
n.saved.chin*n.chains;  # TOTAL SAMPLES SAVED


## HAND-OFF TO JAGS
bml1 <- jags(model.file="irt_3pl.R",
             data = data1,
             inits = inits1,
             parameters.to.save = parameters1,
             n.chains = n.chains,   # (DEFAULT: 1)
             n.iter = n.iteration,
             n.burnin = n.discard,
             n.thin = n.thining,
             DIC = TRUE
             );

print(bml1);


##  PROCESS THE MCMC RESULTS
mcmc.out <- as.mcmc(bml1);

out.matrix <- as.matrix(mcmc.out);
dim(out.matrix);
#out.matrix[1,1:50];


alpha <- out.matrix[,c(1:4,14)];
beta <- out.matrix[,5:8];
gamma <- out.matrix[,10:13];
theta <- out.matrix[,15:1328];

out.para <- cbind(alpha, beta, gamma);
dim(out.para);


##  Order "theta" by 1,2,3,... rather than 1,10,100,...
step1 <- sapply(strsplit(x=colnames(theta), split='[', fixed=TRUE), function(x) (x[2]));
step2 <- sapply(strsplit(x=step1, split=']', fixed=TRUE), function(x) (x[1]));
step3 <- data.frame(index=1:length(colnames(theta)), theta=as.numeric(step2));
index <- step3[order(step3[,2]), ][,1];

theta.sort <- theta[,index];


##  SAVE THE RESULTS
#write.csv(out.para, "/3pl_para_anes.csv", row.names=FALSE);
#write.csv(theta.sort, "/3pl_theta_anes.csv", row.names=FALSE);


########################    2PL-GUESSING MODEL    #######################
## SET THE SEED, WHICH TELLS US WHERE THE RANDOM PROCESS STARTS.
set.seed(07102014);

##
M <- 4; # 4 choice options
guess.ind <- c(1,1,1,1);  # INDICATES WHICH ITEMS HAVE A GUESSING PARAMETER

data1 <- list("y", "N", "K", "guess.ind", "M");

# INITIAL VALUES; LARGE DISPREAD
inits1 <- list(
	list(theta=rep(0,N), alpha=rep(0,K), tau.alpha=0.1, beta=rep(5,K), b.star=rep(0.1,K) ),
	list(theta=rep(-3,N), alpha=rep(3,K), tau.alpha=1, beta=rep(0.1,K), b.star=rep(0.5,K) ),
	list(theta=rep(3,N), alpha=rep(-3,K), tau.alpha=5, beta=rep(1,K), b.star=rep(0.9,K) )
	);

parameters1 <- c("theta", "alpha", "beta", "sigma.alpha", "b");


##  MARKOV CHAIN MONTE CARLO
n.chains <- length(inits1);
n.iteration <- 100000;
frac.discard <- 0.5;  # FRACTION OF SAMPLES DISCARDED
n.discard <- n.iteration*frac.discard; n.discard;  # BURNIN-IN PERIOD
n.thining <- 10;
n.saved.chin <- (n.iteration - n.discard)/n.thining; n.saved.chin;  # SAMPLES AFTER THINING SAVED FOR EACH CHAIN
n.saved.chin*n.chains;  # TOTAL SAMPLES SAVED


## HAND-OFF TO JAGS
bml1 <- jags(model.file="irt_guess.R",
             data = data1,
             inits = inits1,
             parameters.to.save = parameters1,
             n.chains = n.chains,   # (DEFAULT: 1)
             n.iter = n.iteration,
             n.burnin = n.discard,
             n.thin = n.thining,
             DIC = TRUE
             );

print(bml1);


##  PROCESS THE MCMC RESULTS
mcmc.out <- as.mcmc(bml1);

out.matrix <- as.matrix(mcmc.out);
dim(out.matrix);
#out.matrix[1,1:50];


alpha <- out.matrix[,c(1:4,14)];
beta <- out.matrix[,9:12];
b <- out.matrix[,5:8];
theta <- out.matrix[,15:1328];

out.para <- cbind(alpha, beta, b);
dim(out.para);


##  Order "theta" by 1,2,3,... rather than 1,10,100,...
step1 <- sapply(strsplit(x=colnames(theta), split='[', fixed=TRUE), function(x) (x[2]));
step2 <- sapply(strsplit(x=step1, split=']', fixed=TRUE), function(x) (x[1]));
step3 <- data.frame(index=1:length(colnames(theta)), theta=as.numeric(step2));
index <- step3[order(step3[,2]), ][,1];

theta.sort <- theta[,index];


##  SAVE THE RESULTS
#write.csv(out.para, "/guess_para_anes.csv", row.names=FALSE);
#write.csv(theta.sort, "/guess_theta_anes.csv", row.names=FALSE);
#########################################################################

######################################################
##
##  LOAD AND REOPORT THE RESULTS
##
######################################################
## LODING FUNCTIONS
source("/knowledge_functions.R");

## LOAD THE DATA
mod1 <- read.csv(file="2pl_para_anes.csv", header=TRUE);
dim(mod1);

mod2 <- read.csv(file="3pl_para_anes.csv", header=TRUE);
dim(mod2);

mod3 <- read.csv(file="guess_para_anes.csv", header=TRUE);
dim(mod3);


########################  PLOT ESTIMATES  #######################
par.mod1 <- cbind(round(apply(mod1, 2, mean), 3), HPDint(mod1, prob=0.90)$HPDinterval);
par.mod2 <- cbind(round(apply(mod2, 2, mean), 3), HPDint(mod2, prob=0.90)$HPDinterval);
par.mod3 <- cbind(round(apply(mod3, 2, mean), 3), HPDint(mod3, prob=0.90)$HPDinterval);

var.label <- c("Chief Justice", "UK Primier", "Speaker of HOR", "Least Spending");

##
par(mfrow=c(1,1), mar=c(2,4,5,2), oma=c(2,2,0,0));

########################  FIGURE 3-1: ITEM DIFFICULTY FOR ITEMS 4-7  #######################
plot(1:17 ~ 0, type="n", xlab="", ylab="", main="Item Difficulty", axes=FALSE, xlim=c(-5,2), cex.main=1.1);

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,2,1) ); # X-axis
axis(side=3, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,2,1) ); # X-axis

axis(side=2, las=1, tick=FALSE, cex.axis=0.8, at=c(3,7,11,15), label=rev(var.label) ); # Y-axis

abline(v=seq(-6,6,0.01), col="gray97", lty=1, lwd=1); # background

abline(h=c(2:4,6:8,10:12,14:16), lty=2, lwd=0.5, col="white");

abline(v=seq(-5,5,1), col="white", lty=1, lwd=2);


# 90% credible intervals
segments(par.mod1[1:4,2], c(16,12,8,4), par.mod1[1:4,3], c(16,12,8,4), lty=6, col="gray20", lwd=3);
segments(par.mod2[1:4,2], c(15,11,7,3), par.mod2[1:4,3], c(15,11,7,3), lty=1, col="black", lwd=3);
segments(par.mod3[1:4,2], c(14,10,6,2), par.mod3[1:4,3], c(14,10,6,2), lty=2, col="gray70", lwd=3);

# Means
points(x=par.mod1[1:4,1], y=c(16,12,8,4), pch=23, cex=1.2, col="gray20", bg="gray20");
points(x=par.mod2[1:4,1], y=c(15,11,7,3), pch=22, cex=1.2, col="black", bg="black");
points(x=par.mod3[1:4,1], y=c(14,10,6,2), pch=21, cex=1.2, col="gray70", bg="gray70");

legend(x=-5, y=17, legend=c("2PL", "3PL", "2PL-G"), lty=c(6,1,2), pch=c(18,15,16), col=c("gray20","black","gray70"), cex=1, horiz=FALSE, lwd=2, bg="gray97", bty="n" );


########################  FIGURE 3-2: ITEM DISCRIMINATION FOR ITEMS 4-7  #######################
plot(1:17 ~ 0, type="n", xlab="", ylab="", main="Item Discrimination", axes=FALSE, xlim=c(0,5), cex.main=1.1);

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,5,1) ); # X-axis
axis(side=3, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,5,1) ); # X-axis

axis(side=2, las=1, tick=FALSE, cex.axis=0.8, at=c(3,7,11,15), label=rev(var.label) ); # Y-axis

abline(v=seq(-6,6,0.01), col="gray97", lty=1, lwd=1);

abline(h=c(2:4,6:8,10:12,14:16), lty=2, lwd=0.5, col="white");

abline(v=seq(-5,5,0.5), col="white", lty=1, lwd=1);
abline(v=seq(-5,5,1), col="white", lty=1, lwd=2);


# 90% credible intervals
segments(par.mod1[6:9,2], c(16,12,8,4), par.mod1[6:9,3], c(16,12,8,4), lty=6, col="gray20", lwd=3);
segments(par.mod2[6:9,2], c(15,11,7,3), par.mod2[6:9,3], c(15,11,7,3), lty=1, col="black", lwd=3);
segments(par.mod3[6:9,2], c(14,10,6,2), par.mod3[6:9,3], c(14,10,6,2), lty=2, col="gray70", lwd=3);

# Means
points(x=par.mod1[6:9,1], y=c(16,12,8,4), pch=23, cex=1.2, col="gray20", bg="gray20");
points(x=par.mod2[6:9,1], y=c(15,11,7,3), pch=22, cex=1.2, col="black", bg="black");
points(x=par.mod3[6:9,1], y=c(14,10,6,2), pch=21, cex=1.2, col="gray70", bg="gray70");

legend(x=0, y=17, legend=c("2PL", "3PL", "2PL-G"), lty=1, pch=c(18,15,16), col=c("gray20","black","gray70"), cex=1, horiz=FALSE, lwd=2, bg="gray97", bty="n" );


#####################################################################
######################    Guessing Property    ######################
par(mfrow=c(1,1), mar=c(2,4,5,2), oma=c(2,2,0,0));

########################  FIGURE 4-1: GUESSING PARAMETERS FOR ITEMS 4-7  #######################
plot(1.5:5.5 ~ 0, type="n", xlab="", ylab="", main="Guessing Parameter of 3PL", axes=FALSE, xlim=c(0,1), cex.main=1.1);

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.2) ); # X-axis
axis(side=3, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.2) ); # X-axis

axis(side=2, las=1, tick=FALSE, cex.axis=0.8, at=2:5, label=rev(var.label) ); # Y-axis

abline(v=seq(-3,1,0.001), col="gray97", lty=1, lwd=1); # background

abline(h=2:5, lty=2, lwd=1, col="white");

abline(v=seq(0,1,0.2), col="white", lty=1, lwd=2);


# 90% credible intervals
segments(par.mod2[10:13,2], 5:2, par.mod2[10:13,3], 5:2, lty=1, col="black", lwd=3);

# Means
points(x=par.mod2[10:13,1], y=5:2, pch=22, cex=1.2, col="black", bg="black");


########################  FIGURE 4-2: GUESSING PROPERTIES FOR ITEMS 4-7  #######################
##  b*alpha
sub.mod3 <- mod3[,c(1:4,10:13)];

g.comp <- sub.mod3[,5:8]*sub.mod3[,1:4];
g.comp.mu <- cbind(round(apply(g.comp, 2, mean), 3), HPDint(g.comp, prob=0.90)$HPDinterval);

##
plot(1.5:5.5 ~ 0, type="n", xlab="", ylab="", main="Guessing Component of 2PL-G", axes=FALSE, xlim=c(-3,1), cex.main=1.1);

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-3,1,1) ); # X-axis
axis(side=3, las=1, tick=FALSE, cex.axis=1.2, at=seq(-3,1,1) ); # X-axis

axis(side=2, las=1, tick=FALSE, cex.axis=0.8, at=2:5, label=rev(var.label) ); # Y-axis

abline(v=seq(-3,1,0.001), col="gray97", lty=1, lwd=1); # background

abline(h=2:5, lty=2, lwd=1, col="white");

abline(v=seq(-3,1,1), col="white", lty=1, lwd=2);

# 90% credible intervals
segments(g.comp.mu[1:4,2], 5:2, g.comp.mu[1:4,3], 5:2, col="black", lwd=3);

# Means
points(x= g.comp.mu[1:4,1], y=5:2, pch=21, cex=1.2, col="black", bg="black");


#########################################################################################################
########################  PLOT ITEM CHARACTERISTIC CURVES  #######################
## LOAD ITEM PARAMETER DATA
close.mod2 <- par.mod2[c(1:4,6:13),1];
close.mod3 <- par.mod3[c(1:4,6:13),1];


## GENERATE SIMULATED DATA FOR FIGURE
set.seed(07102014);

z <- seq(from=-5, to=5, by=0.1);
logis.cdf <- plogis(z);

###################  PARAMETERS OF PLOTS
par(mfrow=c(1,1),mar=c(4,4,1,1),oma=c(1,1,1,1));


##########################  3PL-IRT MODEL  ##########################
###################  FIGURE 5-1: MAKE PLOTS FOR GUESSING PROBABILITY
plot(logis.cdf ~ z, type="n", xlab=expression(theta), ylab="Prob. of Successful Guess", main="3PL Model", xlim=c(-5, 5), ylim=c(0, 1), yaxt="n", xaxt="n", bty="n");

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,5,1) ); # X-axis
axis(side=2, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.25)); # Y-axis

abline(v=seq(-6,6,0.01), col="gray97", lty=1, lwd=1);

abline(h=seq(0,1,0.25), col="gray100", lty=1, lwd=1);
abline(h=c(0,0.5,1), col="white", lty=1, lwd=2);

abline(v=seq(-5,5,1), col="white", lty=1, lwd=2);


curve(irt.3pl(x, alpha= close.mod2[1], beta= close.mod2[5], gamma= close.mod2[9])[,"prob.guess"], add=TRUE, col="black", lty=1, lwd=2);
curve(irt.3pl(x, alpha= close.mod2[2], beta= close.mod2[6], gamma = close.mod2[10])[,"prob.guess"], add=TRUE, col="gray25", lty=2, lwd=2);
curve(irt.3pl(x, alpha= close.mod2[3], beta= close.mod2[7], gamma = close.mod2[11])[,"prob.guess"], add=TRUE, col="gray50", lty=3, lwd=2);
curve(irt.3pl(x, alpha= close.mod2[4], beta= close.mod2[8], gamma = close.mod2[12])[,"prob.guess"], add=TRUE, col="gray75", lty=4, lwd=2);


exp.item1 <- expression(paste("Chief Justice ", alpha,"=-0.377, ", beta,"=2.373, ", c,"=0.360"));
exp.item2 <- expression(paste("UK Primier ", alpha,"=0.227, ", beta,"=2.370, ", c,"=0.167"));
exp.item3 <- expression(paste("Speaker of HOR ", alpha,"=-0.516, ", beta,"=3.113, ", c,"=0.362"));
exp.item4 <- expression(paste("Least Spending ", alpha,"=0.646, ", beta,"=1.108, ", c,"=0.097"));

legend(x=-4, y=1, legend=c(exp.item1, exp.item2, exp.item3, exp.item4), lty=c(1,2,3,4), col=c("black","gray25","gray50","gray75"), cex=1, text.col=c("black","gray25","gray50","gray75"), horiz=FALSE, lwd=2, bg="gray97", bty="n" );


###################  FIGURE 5-2: MAKE PLOTS FOR ICC
plot(logis.cdf ~ z, type="n", xlab=expression(theta), ylab="Prob. of Correct Response", main="3PL Model", xlim=c(-5, 5), ylim=c(0, 1), yaxt="n", xaxt="n", bty="n");

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,5,1) ); # X-axis
axis(side=2, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.25)); # Y-axis

abline(v=seq(-6,6,0.01), col="gray97", lty=1, lwd=1);

abline(h=seq(0,1,0.25), col="gray100", lty=1, lwd=1);
abline(h=c(0,0.5,1), col="white", lty=1, lwd=2);

abline(v=seq(-5,5,1), col="white", lty=1, lwd=2);


curve(irt.3pl(x, alpha= close.mod2[1], beta= close.mod2[5], gamma = close.mod2[9])[,"prob.correct"], add=TRUE, col="black", lty=1, lwd=2);
curve(irt.3pl(x, alpha= close.mod2[2], beta= close.mod2[6], gamma = close.mod2[10])[,"prob.correct"], add=TRUE, col="gray25", lty=2, lwd=2);
curve(irt.3pl(x, alpha= close.mod2[3], beta= close.mod2[7], gamma = close.mod2[11])[,"prob.correct"], add=TRUE, col="gray50", lty=3, lwd=2);
curve(irt.3pl(x, alpha= close.mod2[4], beta= close.mod2[8], gamma = close.mod2[12])[,"prob.correct"], add=TRUE, col="gray75", lty=4, lwd=2);


legend(x=-2., y=0.13, legend=c(exp.item1, exp.item2, exp.item3, exp.item4), lty=c(1,2,3,4), col=c("black","gray25","gray50","gray75"), cex=1, text.col=c("black","gray25","gray50","gray75"), horiz=FALSE, lwd=2, bty="n" );


###################  FIGURE 5-3: MAKE PLOTS FOR GUESSING PROBABILITY
plot(logis.cdf ~ z, type="n", xlab=expression(theta), ylab="Prob. of Successful Guess", main="2PL-G Model", xlim=c(-5, 5), ylim=c(0, 1), yaxt="n", xaxt="n", bty="n");

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,5,1) ); # X-axis
axis(side=2, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.25)); # Y-axis

abline(v=seq(-6,6,0.01), col="gray97", lty=1, lwd=1);

abline(h=seq(0,1,0.25), col="gray100", lty=1, lwd=1);
abline(h=c(0,0.5,1), col="white", lty=1, lwd=2);

abline(v=seq(-5,5,1), col="white", lty=1, lwd=2);


curve(prob.guess(x, alpha= close.mod3[1], beta= close.mod3[5], b= close.mod3[9]), add=TRUE, col="black", lty=1, lwd=2);
curve(prob.guess(x, alpha= close.mod3[2], beta= close.mod3[6], b= close.mod3[10]), add=TRUE, col="gray25", lty=2, lwd=2);
curve(prob.guess(x, alpha= close.mod3[3], beta= close.mod3[7], b= close.mod3[11]), add=TRUE, col="gray50", lty=3, lwd=2);
curve(prob.guess(x, alpha= close.mod3[4], beta= close.mod3[8], b= close.mod3[12]), add=TRUE, col="gray75", lty=4, lwd=2);


exp.item1 <- expression(paste("Chief Justice ", alpha,"=-0.673, ", beta,"=1.662, ", b,"=0.458"));
exp.item2 <- expression(paste("UK Primier ", alpha,"=0.348, ", beta,"=2.716, ", b,"=0.403"));
exp.item3 <- expression(paste("Speaker of HOR ", alpha,"=-0.759, ", beta,"=2.387, ", b,"=0.451"));
exp.item4 <- expression(paste("Least Spending ", alpha,"=1.000, ", beta,"=1.260, ", b,"=0.546"));

legend(x=-4, y=0.9, legend=c(exp.item1, exp.item2, exp.item3, exp.item4), lty=c(1,2,3,4), col=c("black","gray25","gray50","gray75"), cex=1, text.col=c("black","gray25","gray50","gray75"), horiz=FALSE, lwd=2, bty="n" );


###################  FIGURE 5-4: MAKE PLOTS FOR ICC
plot(logis.cdf ~ z, type="n", xlab=expression(theta), ylab="Prob. of Correct Response", main="2PL-G Model", xlim=c(-5, 5), ylim=c(0, 1), yaxt="n", xaxt="n", bty="n");

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-5,5,1) ); # X-axis
axis(side=2, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.25)); # Y-axis

abline(v=seq(-6,6,0.01), col="gray97", lty=1, lwd=1);

abline(h=seq(0,1,0.25), col="gray100", lty=1, lwd=1);
abline(h=c(0,0.5,1), col="white", lty=1, lwd=2);

abline(v=seq(-5,5,1), col="white", lty=1, lwd=2);


curve(irt.guess1(x, alpha= close.mod3[1], beta= close.mod3[5], b= close.mod3[9])[,"prob.correct"], add=TRUE, col="black", lty=1, lwd=2);
curve(irt.guess1(x, alpha= close.mod3[2], beta= close.mod3[6], b= close.mod3[10])[,"prob.correct"], add=TRUE, col="gray25", lty=2, lwd=2);
curve(irt.guess1(x, alpha= close.mod3[3], beta= close.mod3[7], b= close.mod3[11])[,"prob.correct"], add=TRUE, col="gray50", lty=3, lwd=2);
curve(irt.guess1(x, alpha= close.mod3[4], beta= close.mod3[8], b= close.mod3[12])[,"prob.correct"], add=TRUE, col="gray75", lty=4, lwd=2);


legend(x=-2, y=0.15, legend=c(exp.item1, exp.item2, exp.item3, exp.item4), lty=c(1,2,3,4), col=c("black","gray25","gray50","gray75"), cex=1, text.col=c("black","gray25","gray50","gray75"), horiz=FALSE, lwd=2, bty="n" );



#######################################################################################
######################    Comparisons of Latent Traits    ######################
## LOAD REQUIRED FUNCTIONS
source("/knowledge_functions.R");

## LOAD THE DATA
theta.2pl <- read.csv(file="2pl_theta_anes.csv", header=TRUE);
dim(theta.2pl);
mu.2pl <- apply(theta.2pl, 2, mean);

theta.3pl <- read.csv(file="3pl_theta_anes.csv", header=TRUE);
dim(theta.3pl);
mu.3pl <- apply(theta.3pl, 2, mean);

theta.g <- read.csv(file="guess_theta_anes.csv", header=TRUE);
dim(theta.g);
mu.g <- apply(theta.g, 2, mean);

sort.index <- names(sort(mu.g));


###################  FIGURE 6-1: densities of ideal point estimates
library(scales);

###################  PARAMETERS OF PLOTS
par(mfrow=c(1,1), mar=c(4,4,3,1), oma=c(1,1,1,1));

plot(mu.g, dnorm(mu.g), type="n", xlab="Political Knowledge", ylab="Density", main="Distributions of Political Knowledge", xlim=c(-2,2), ylim=c(0,1), yaxt="n", xaxt="n", bty="n");

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(-2,2,0.5) ); # X-axis
axis(side=2, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.5) ); # Y-axis

abline(v=seq(-2.5,2.5,0.01), col="gray97", lty=1, lwd=1);

abline(h=c(0,0.5,1), col="white", lty=1, lwd=3);

abline(v=seq(-2,2,0.5), col="gray100", lty=1, lwd=1);
abline(v=seq(-2,2,1), col="white", lty=1, lwd=3);

# 2PL-G
lines(density(mu.g, adjust=2), col="black", lty=1, lwd=2, type="l");
#lines(density(mu.g, adjust=2), col=alpha("black", 0.2), lty=1, lwd=2, type="h");

# 2PL
lines(density(mu.2pl, adjust=2), col="black", lty=2, lwd=2, type="l");
#lines(density(mu.2pl, adjust=2), col=alpha("red", 0.1), lty=1, lwd=2, type="h");

# 3PL
lines(density(mu.3pl, adjust=2), col="black", lty=3, lwd=2, type="l");
#lines(density(mu.3pl, adjust=2), col=alpha("blue", 0.1), lty=1, lwd=2, type="h");

#
legend(x=0.5, y=0.75, legend=c("2PL-G", "3PL", "2PL"), lty=c(1,3,2), cex=1, lwd=2, bty="n" );


#######################################################################################
######################    Comparisons of Models    ######################
## LOAD THE ESTIMATES OF ITEM PARAMETERS
mod1 <- read.csv(file="2pl_para_anes.csv", header=TRUE);
dim(mod1);

mod2 <- read.csv(file="3pl_para_anes.csv", header=TRUE);
dim(mod2);

mod3 <- read.csv(file="guess_para_anes.csv", header=TRUE);
dim(mod3);


## LOAD THE DATA
egg4 <- read.csv(file="/anes_specialstudies_2012egss4.csv", header=TRUE);
dim(egg4);

res.data <- egg4[,c("re_zh1","re_zh2","re_zh3","re_zh4")];
res.matrix <- egg4[,c("re_zh1","re_zh2","re_zh3","re_zh4")];
dim(res.data);

##  Distribution of Choice Options
res.data$n.correct <- rowSums(res.data[,c(1:4)], na.rm=TRUE);

index <- which(res.data$n.correct >= 1 & res.data$n.correct <= 3);


##  2PL: The Proportion of Correct Predictions
n.correct <- data.frame(matrix(NA, nrow=dim(mod1)[1], ncol=8));
colnames(n.correct) <- c("m.q1", "m.q2", "m.q3", "m.q4", "n.q1", "n.q2", "n.q3", "n.q4");

for (i in 1:dim(mod1)[1]) {
	pred.2pl <- pred.rate(response=res.matrix[index,], theta=theta.2pl[i, index], beta=mod1[i,6:9], alpha=mod1[i,1:4], model="2PL");
	
	n.correct[i,1:4] <- pred.2pl$item.match;
	n.correct[i,5:8] <- pred.2pl$n.answer;
}

##  Correct Predictions by Items
n.correct[,9:12] <- n.correct[,1:4]/n.correct[,5:8];

par.mod1 <- cbind(round(apply(n.correct[,9:12], 2, mean), 3), HPDint(n.correct[,9:12], prob=0.90)$HPDinterval);
par.mod1;


##  3PL: The Proportion of Correct Predictions
n.correct2 <- data.frame(matrix(NA, nrow=dim(mod1)[1], ncol=8));
colnames(n.correct) <- c("m.q1", "m.q2", "m.q3", "m.q4", "n.q1", "n.q2", "n.q3", "n.q4");

for (i in 1:dim(mod1)[1]) {
	pred.3pl <- pred.rate(response=res.matrix[index,], theta=theta.3pl[i, index], beta=mod2[i,6:9], alpha=mod2[i,1:4], model="3PL", c=mod2[i,10:13]);
	
	n.correct2[i,1:4] <- pred.3pl$item.match;
	n.correct2[i,5:8] <- pred.3pl$n.answer;
}

##  Correct Predictions by Items
n.correct2[,9:12] <- n.correct2[,1:4]/n.correct2[,5:8];

par.mod2 <- cbind(round(apply(n.correct2[,9:12], 2, mean), 3), HPDint(n.correct2[,9:12], prob=0.90)$HPDinterval);
par.mod2;


##  2PL-G: The Proportion of Correct Predictions
n.correct3 <- data.frame(matrix(NA, nrow=dim(mod1)[1], ncol=8));
colnames(n.correct3) <- c("m.q4", "m.q5", "m.q6", "m.q7", "n.q4", "n.q5", "n.q6", "n.q7");

for (i in 1:dim(mod1)[1]) {
	pred.g <- pred.rate(response=res.matrix[index,], theta=theta.g[i, index], beta=mod3[i,6:9], alpha=mod3[i,1:4], model="2PL-G", b=mod3[i,10:13], M=4);
	
	n.correct3[i,1:4] <- pred.g$item.match;
	n.correct3[i,5:8] <- pred.g$n.answer;
}

##  Correct Predictions by Items
n.correct3[,9:12] <- n.correct3[,1:4]/n.correct3[,5:8];

par.mod3 <- cbind(round(apply(n.correct3[,9:12], 2, mean), 3), HPDint(n.correct3[,9:12], prob=0.90)$HPDinterval);
par.mod3;


########################  PLOT ESTIMATES  #######################
var.label <- c("Chief Justice", "UK Primier", "Speaker of HOR", "Least Spending");

##
par(mfrow=c(1,1), mar=c(2,4,5,2), oma=c(2,2,0,0));

########################  FIGURE 6-2:   #######################
plot(1:17 ~ 0, type="n", xlab="", ylab="", main="Proportions of Correct Prediction", axes=FALSE, xlim=c(0.49,1.01), cex.main=1.1);

axis(side=1, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.1) ); # X-axis
axis(side=3, las=1, tick=FALSE, cex.axis=1.2, at=seq(0,1,0.1) ); # X-axis

axis(side=2, las=1, tick=FALSE, cex.axis=0.8, at=c(3,7,11,15), label=rev(var.label) ); # Y-axis

abline(v=seq(-1,1.05,0.001), col="gray97", lty=1, lwd=1); # background

abline(h=c(2:4,6:8,10:12,14:16), lty=2, lwd=1, col="white");

abline(v=seq(0,1,0.1), col="white", lty=1, lwd=2);
abline(v=seq(0,1,0.05), col="white", lty=1, lwd=1);


# 90% credible intervals
segments(par.mod1[1:4,2], c(16,12,8,4), par.mod1[1:4,3], c(16,12,8,4), lty=6, col="gray20", lwd=3);
segments(par.mod2[1:4,2], c(15,11,7,3), par.mod2[1:4,3], c(15,11,7,3), lty=1, col="black", lwd=3);
segments(par.mod3[1:4,2], c(14,10,6,2), par.mod3[1:4,3], c(14,10,6,2), lty=2, col="gray70", lwd=3);

# Means
points(x=par.mod1[1:4,1], y=c(16,12,8,4), pch=23, cex=1.2, col="gray20", bg="gray20");
points(x=par.mod2[1:4,1], y=c(15,11,7,3), pch=22, cex=1.2, col="black", bg="black");
points(x=par.mod3[1:4,1], y=c(14,10,6,2), pch=21, cex=1.2, col="gray70", bg="gray70");

legend(x=0.1, y=17, legend=c("2PL", "3PL", "2PL-G"), lty=c(6,1,2), pch=c(18,15,16), col=c("gray20","black","gray70"), cex=1, horiz=FALSE, lwd=2, bg="gray97", bty="n" );

